#coding=utf-8

import os
import time
import re
from collections import Counter


def puncfilter(line):
    r1 = u'[’!"#$%&\'()*+,-./:;<=>?@；；：．｜～\≧▽—°❄×🍀🐾🍓🐋▲♥♀☀●巜「」☕／↓→<=>?@⁄•ω★💊🙈☕💰😂·、…★、​…【】《》『』（）？“”‘’！[\\]^_`{|}~]+'
    line1 = re.sub(r1, '', line)
    line2 = line1.replace('\\', '')
    return line2

def platformUni(platform):
    if 'iPhone' in platform:
        iphonelist =[ "iPhone 5s", "iPhone 5c", "iPhone 5",  "iPhone 6 Plus", "iPhone 6s Plus", "iPhone 6s","iPhone 6", "iPhone 7 Plus", "iPhone 7","iPhone SE", "iPhone"]
        for phone in iphonelist:
            if phone in platform:
                new = phone
                return new
        # new = "iPhone" + platform.split('iPhone')[1]
    elif 'iOS' in platform:
        new = "iPhone"
        return new
    elif 'Android' in platform:
        new = "Android" + platform.split('Android')[1]
        return new
    elif 'iPad' in platform:
        new = "iPad" + platform.split('iPad')[1]
        return new
    elif '360手机' in platform:
        new = "360手机"
        return new
    elif '魅族' in platform:
        new = "魅族" + platform.split('魅族')[1]
        return new
    elif 'MEIZU' in platform:
        new = "魅族" + platform.split('MEIZU')[1]
        return new
    elif '魅蓝' in platform:
        new = "魅族 魅蓝" + platform.split('魅蓝')[1]
        return new
    elif 'Galaxy' in platform:
        new = "三星 Galaxy" + platform.split('Galaxy')[1]
        return new
    elif 'GALAXY' in platform:
        new = "三星 Galaxy" + platform.split('GALAXY')[1]
        return new
    elif 'Samsung' in platform:
        new = "三星" + platform.split('Samsung')[1]
        return new
    elif '360' in platform:
        new = "360" + platform.split('360')[1]
        return new
    elif '小米' in platform:
        new = "小米" + platform.split('小米')[1]
        return new
    elif '红米' in platform:
        new = "小米 红米" + platform.split('红米')[1]
        return new
    elif 'xiaomi' in platform:
        new = "小米" + platform.split('小米')[1]
        return new
    elif '荣耀' in platform:
        new = "华为荣耀" + platform.split('荣耀')[1]
        return new
    elif 'vivo' in platform:
        new = "vivo" + platform.split('vivo')[1]
        return new
    elif 'HUAWEI' in platform:
        new = "华为" + platform.split('HUAWEI')[1]
        return new
    elif 'OnePlus' in platform:
        new = "一加" + platform.split('OnePlus')[1]
        return new
    elif 'Smartisan' in platform:
        new = "锤子" + platform.split('Smartisan')[1]
        return new
    elif '坚果' in platform:
        new = "锤子 坚果" + platform.split('坚果')[1]
        return new
    elif 'Xperia' in platform:
        new = "索尼 Xperia" + platform.split('Xperia')[1]
        return new
    else:
        return platform

def platformSimp(platform):
    platformlist = ["iPhone", "iPad", "秒拍", "三星", "华为", "小米", "OPPO", "vivo", "魅族","索尼","锤子","一加","Android"]
    for phone in platformlist:
        if phone in platform:
            return phone
    return platform

# print platformUni(p1)
# print platformSimp(p1)
# print platformSimp(p4)


def removepeople(peopleline):
    pattern = peopleline.split("//@")
    outputline = ""
    for name in pattern:
        name = name.split(":")[-1]
        outputline += name
    return outputline

# print str(removepeople(line1)).decode('string_escape')


def removeurl(urlline):
    results = re.compile(r'http://[a-zA-Z0-9.?/&=:]*', re.S)
    dd = results.sub("", urlline)
    return dd



def removeEmoji(text):
    # emoji_pattern  = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
    nomoji = re.compile(u'['
                      u'\U0001F300-\U0001F5FF'
                      u'\U0001F600-\U0001F64F'
                      u'\U0001F680-\U0001F6FF'
                      u'\u2600-\u26FF\u2700-\u27BF]+',
                      re.UNICODE)
    return nomoji.sub(r'', text)  # no emoji




def get_files_from_folder(rootDir):
    list_dirs = os.walk(rootDir)
    for root, dirs, files in list_dirs:
        for f in files:
            print f
            print "like you:", f
            #print os.path.join(root, f)


get_files_from_folder("../WBTestdata/proxy")

def creat_date_list(month,i,j):
    dates = []
    for n in range(i,j):
        date = month + "-" +str(n).zfill(2)
        dates.append(date)
    return dates

# print creat_date_list("04",01,15)

def process_time(input, starttime):
    if "今天" in input:
        thisStartTime = time.localtime(float(starttime))
        otherStyleTime = str(time.strftime("%Y-%m-%d", thisStartTime))
        creatTime = otherStyleTime + " " + input.split(" ")[1]+":00"
        return creatTime
    elif "分钟前" in input:
        creatTime = 60 * float(input.strip("分钟前"))
        thisStartTime = time.localtime(float(starttime) - creatTime)
        otherStyleTime = str(time.strftime("%Y-%m-%d %H:%M:%S", thisStartTime))
        return otherStyleTime
    else:
        return "2017-"+input+":00"



def generate_insert(month,i,j):
    dates = []
    for n in range(i, j):
        date = month + "-" + str(n).zfill(2)
        pla = "load data local infile 'C:/Users/kaidi/Documents/GitHub/WBDatabase/time/" + date+ ".txt' into table wbdata lINES TERMINATED BY '" + r'\r\n'+"';"
        dates.append(pla)
    return dates




def turn_tags_tostring(sql_result):
    outputstring = ""
    for row in sql_result:
        longlist = row[0].split(" ")
        for i in range(len(longlist)-1):
            outputstring = outputstring + longlist[i+1] +","
    return outputstring

def linear_scale(inputmin,inputmax,outputmin,outputmax,item):
    a = (outputmax-outputmin)/float(inputmax-inputmin)


    b = outputmax - a*inputmax
    output = a*item +b
    return output

def wordscounter(text, n):
    wordDict = {}
    wordlist =text.split(",")
    for word in wordlist:
        if word in wordDict:
            wordDict[word] = wordDict[word] + 1
        else:
            wordDict[word] = 1

    removelist = ["秒拍", "视频", "网页", "分享","全文","链接","00","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","21","22","23","24","26","27","28","100","25","20","30","40","50","60","70","80"]
    for word in removelist:
        try:
            del wordDict[word]
        except Exception:
            pass

    count = Counter(wordDict)
    rank = count.most_common()[:n]
    countmax = rank[1][1]
    countmin = rank[-1][1]
    print "count max", countmax
    print "count min", countmin
    diclist = []
    for item in rank :
        rankdic = {}
        rankdic['text'] = item[0]
        rankdic['size'] = linear_scale(countmin,countmax,10,110,item[1])
        print "linear",item[1]," is ", rankdic['size']
        diclist.append(rankdic)

    diclist[0]['size'] = 120
    return diclist

def dot_coor(keywordlist):
    points = [["南苏丹", "1547.18017578125", "785.9016723632812"], ["秘鲁", "628.4868774414062", "970.151611328125"],
              ["布基纳法索", "1270.7194213867188", "756.82666015625"], ["利比亚", "1433.2128295898438", "617.1016540527344"],
              ["白俄罗斯", "1505.9412231445312", "348.0906066894531"], ["巴基斯坦", "1866.8802490234375", "576.3596038818359"],
              ["玻利维亚", "731.4933471679688", "1040.8766174316406"], ["科特迪瓦", "1235.5302124023438", "803.7808837890625"],
              ["阿尔及利亚", "1300.9364624023438", "599.9543609619141"], ["瑞士", "1358.2608032226562", "413.7236022949219"],
              ["喀麦隆", "1392.3614501953125", "805.1266479492188"], ["马其顿", "1465.7857666015625", "464.7266540527344"],
              ["博茨瓦纳", "1497.7301635742188", "1100.9960327148438"], ["乌克兰", "1533.9552001953125", "399.18902587890625"],
              ["肯尼亚", "1617.3302001953125", "878.8703918457031"], ["约旦", "1599.5941772460938", "567.6516418457031"],
              ["马里", "1250.480224609375", "703.9266357421875"], ["刚果布", "1474.8217163085938", "918.9766235351562"],
              ["索马里", "1688.9496459960938", "827.5516662597656"], ["阿富汗", "1847.9052124023438", "541.2960815429688"],
              ["加纳", "1275.3489379882812", "799.95166015625"], ["奥地利", "1397.5364990234375", "405.50164794921875"],
              ["乌干达", "1567.30517578125", "865.0833435058594"], ["哥伦比亚", "642.3857116699219", "838.04541015625"],
              ["苏丹", "1547.18017578125", "725.9016723632812"], ["伊拉克", "1654.7052001953125", "548.4210968017578"],
              ["尼日尔", "1354.8746337890625", "703.5101928710938"], ["拉脱维亚", "1477.6051635742188", "318.1016540527344"],
              ["罗马尼亚", "1489.2547607421875", "422.4322052001953"], ["赞比亚", "1527.4864501953125", "1009.882568359375"],
              ["埃塞俄比亚", "1638.6051635742188", "787.8766479492188"], ["危地马拉", "499.53016662597656", "721.7516479492188"],
              ["苏里南", "792.5490417480469", "839.5322265625"], ["捷克", "1413.8975219726562", "385.37664794921875"],
              ["乍得", "1447.7052001953125", "724.64404296875"], ["阿尔巴尼亚", "1453.6119384765625", "469.9845886230469"],
              ["叙利亚", "1611.0051879882812", "532.3348999023438"], ["吉尔吉斯", "1893.4739379882812", "469.32666015625"],
              ["哥斯达黎加", "547.6864013671875", "783.1156311035156"], ["巴拉圭", "784.1551513671875", "1111.7989501953125"],
              ["波兰", "1440.8941650390625", "365.10789489746094"], ["纳米比亚", "1445.4052124023438", "1107.0016479492188"],
              ["南非", "1496.5802001953125", "1162.0579223632812"], ["埃及", "1544.1878662109375", "613.6516418457031"],
              ["格鲁吉亚", "1638.605224609375", "458.00592041015625"], ["波斯尼亚", "1432.18017578125", "442.62109375"],
              ["萨尔瓦多", "509.4618682861328", "741.62109375"], ["圭亚那", "766.3301696777344", "830.4266662597656"],
              ["比利时", "1329.8302001953125", "378.3937072753906"], ["莱索托", "1525.9052124023438", "1173.6563720703125"],
              ["保加利亚", "1494.4239501953125", "454.1384735107422"], ["布隆迪", "1546.6051635742188", "912.63427734375"],
              ["吉布提", "1656.5615234375", "761.3627624511719"], ["乌拉圭", "821.9441528320312", "1202.4515991210938"],
              ["刚果金", "1414.93017578125", "885.2002868652344"], ["卢旺达", "1546.268310546875", "898.1609497070312"],
              ["亚美尼亚", "1655.2801513671875", "480.0759582519531"], ["塞内加尔", "1159.0552368164062", "734.4845886230469"],
              ["多哥", "1291.3052368164062", "793.0516357421875"], ["匈牙利", "1445.0857543945312", "410.6592712402344"],
              ["马拉维", "1583.261474609375", "1010.9766235351562"], ["塔吉克斯坦", "1871.7183227539062", "492.0933532714844"],
              ["冰岛", "1178.7619018554688", "244.93289184570312"], ["尼加拉瓜", "540.6126098632812", "750.9328918457031"],
              ["摩洛哥", "1227.1823120117188", "562.7322082519531"], ["利比里亚", "1201.2857666015625", "814.9016418457031"],
              ["中非", "1467.8302001953125", "812.6016540527344"], ["斯洛伐克", "1446.1369018554688", "396.30165100097656"],
              ["立陶宛", "1473.5802001953125", "334.2190399169922"], ["津巴布韦", "1537.5450439453125", "1068.1397705078125"],
              ["以色列", "1580.6739501953125", "566.5016479492188"], ["老挝", "2185.09326171875", "697.6016540527344"],
              ["朝鲜", "2317.68017578125", "477.95164489746094"], ["土库曼斯坦", "1773.730224609375", "491.3203887939453"],
              ["贝宁", "1304.5302124023438", "786.4884643554688"], ["斯洛文尼亚", "1410.9052124023438", "420.4122772216797"],
              ["摩尔多瓦", "1514.4052124023438", "412.702392578125"], ["尼泊尔", "2001.2691650390625", "595.9703979492188"],
              ["斯威士兰", "1555.2301635742188", "1142.8897705078125"], ["蒙古", "2098.3435668945312", "414.12664794921875"],
              ["不丹", "2056.3107299804688", "605.6016540527344"], ["法国", "1307.8234252929688", "415.20445251464844"],
              ["印尼", "2318.830078125", "903.45166015625"], ["也门", "1707.0301513671875", "723.4766540527344"],
              ["马达加斯加", "1690.3551635742188", "1065.6016540527344"], ["台湾", "2313.6551513671875", "644.1266479492188"],
              ["墨西哥", "411.55516052246094", "644.1266479492188"], ["阿联酋", "1747.8551635742188", "636.6516723632812"],
              ["伯利兹", "516.7801666259766", "707.3766479492188"], ["巴西", "807.4107360839844", "1020.2284851074219"],
              ["塞拉利昂", "1180.9051513671875", "794.7766418457031"], ["意大利", "1393.0801391601562", "462.4266357421875"],
              ["孟加拉", "2060.0802001953125", "642.9766540527344"], ["多米尼加", "678.93017578125", "691.2766418457031"],
              ["几内亚比绍", "1152.1551513671875", "761.4266357421875"], ["瑞典", "1421.8302001953125", "270.95164489746094"],
              ["土耳其", "1578.8051147460938", "491.1766357421875"], ["莫桑比克", "1593.1801147460938", "1064.45166015625"],
              ["日本", "2399.3302001953125", "532.5766448974609"], ["新西兰", "2652.9051513671875", "1301.9266967773438"],
              ["古巴", "601.30517578125", "664.82666015625"], ["委内瑞拉", "701.3551635742188", "814.32666015625"],
              ["葡萄牙", "1226.81982421875", "485.0898132324219"], ["毛里塔尼亚", "1191.8301391601562", "670.0016479492188"],
              ["安哥拉", "1440.8051147460938", "990.8516540527344"], ["德国", "1375.255126953125", "372.7266540527344"],
              ["泰国", "2165.8802490234375", "748.7766418457031"], ["澳大利亚", "2418.880126953125", "1146.6766662597656"],
              ["新几内亚", "2585.6302490234375", "943.1266479492188"], ["克罗地亚", "1422.9801635742188", "436.55165100097656"],
              ["丹麦", "1383.3051147460938", "325.00164794921875"], ["伊朗", "1736.93017578125", "556.1516418457031"],
              ["缅甸", "2118.1551513671875", "687.82666015625"], ["芬兰", "1478.755126953125", "246.22664642333984"],
              ["所罗门群岛", "2698.3302001953125", "970.7266235351562"], ["阿曼", "1768.55517578125", "664.8266296386719"],
              ["巴拿马", "582.3301391601562", "795.3516540527344"], ["阿根廷", "761.7301635742188", "1258.8016967773438"],
              ["英国", "1274.05517578125", "332.4766387939453"], ["几内亚", "1184.3551635742188", "779.8266296386719"],
              ["爱尔兰", "1234.9551391601562", "350.87664794921875"], ["尼日利亚", "1360.30517578125", "789.0266723632812"],
              ["突尼斯", "1368.3551025390625", "542.3516540527344"], ["坦桑尼亚", "1589.1551513671875", "941.9766540527344"],
              ["沙特", "1672.5301513671875", "637.8016357421875"], ["越南", "2201.5302734375", "720.0266418457031"],
              ["俄罗斯", "2004.30517578125", "291.65164947509766"], ["海地", "652.4801635742188", "690.1266479492188"],
              ["印度", "1992.2301635742188", "668.2766418457031"], ["加拿大", "653.6301574707031", "283.6016616821289"],
              ["赤道几内亚", "1370.0801391601562", "855.7266235351562"],
              ["阿塞拜疆", "1675.9801635742188", "479.67665100097656"], ["马来西亚", "2245.80517578125", "835.6016540527344"],
              ["菲律宾", "2351.605224609375", "751.6516418457031"], ["塞尔维亚", "1458.05517578125", "441.7266540527344"],
              ["黑山共和国", "1444.8302001953125", "454.37664794921875"],
              ["爱沙尼亚", "1478.1801147460938", "302.57664489746094"],
              ["西班牙", "1267.8739013671875", "481.9766540527344"], ["加蓬", "1386.18017578125", "886.7766723632812"],
              ["柬埔寨", "2199.80517578125", "755.6766357421875"], ["韩国", "2341.8302001953125", "522.8016662597656"],
              ["洪都拉斯", "534.0301666259766", "732.6766357421875"], ["智利", "700.9551696777344", "1232.3516845703125"],
              ["荷兰", "1336.1551513671875", "363.5266418457031"], ["斯里兰卡", "1992.2301635742188", "801.1016540527344"],
              ["希腊", "1486.2301635742188", "497.50164794921875"], ["厄瓜多尔", "597.8551635742188", "896.5516357421875"],
              ["挪威", "1380.2225341796875", "260.33458709716797"], ["黎巴嫩", "1586.8551635742188", "541.7766723632812"],
              ["厄立特里亚", "1631.7051391601562", "728.6516723632812"], ["美国", "524.3471527099609", "508.9126739501953"],
              ["哈萨克斯坦", "1809.3801879882812", "403.20164489746094"],
              ["乌兹别克斯坦", "1807.6551513671875", "467.6016540527344"], ["斐济", "2831.730224609375", "1051.8016357421875"],
              ["科威特", "1689.7801513671875", "587.20166015625"], ["东帝汶", "2384.3802490234375", "967.8516540527344"],
              ["巴哈马", "636.9551391601562", "640.6766662597656"], ["瓦努阿图", "2742.605224609375", "1047.7766418457031"],
              ["冈比亚", "1151.0051879882812", "744.7516479492188"], ["卡塔尔", "1723.130126953125", "626.3016357421875"],
              ["牙买加", "615.6801452636719", "697.6016540527344"], ["塞浦路斯", "1565.5801391601562", "530.8516540527344"],
              ["巴勒斯坦", "1585.130126953125", "561.3266296386719"], ["文莱", "2288.355224609375", "840.20166015625"],
              ["特立尼达和多巴哥", "747.93017578125", "773.5016479492188"], ["佛得角", "1076.255126953125", "719.4516296386719"],
              ["萨摩亚", "2921.43017578125", "1013.8516540527344"], ["卢森堡", "1341.9051513671875", "387.1016540527344"],
              ["科摩罗", "1667.3551635742188", "997.7516479492188"], ["毛里求斯", "1778.9051513671875", "1078.2516479492188"],
              ["圣多美和普林西比", "1344.2051391601562", "870.1016540527344"],
              ["多米尼克", "757.1301879882812", "724.0516662597656"], ["汤加", "2878.880126953125", "1088.6016845703125"],
              ["基里巴斯", "1359.1551361083984", "859.7516479492188"],
              ["密克罗尼西亚", "2672.4552001953125", "805.7016296386719"], ["巴林", "1715.0802001953125", "615.9516296386719"],
              ["安道尔", "1303.9551391601562", "456.1016540527344"], ["帕劳", "2459.130126953125", "799.9516296386719"],
              ["塞舌尔", "1772.5802001953125", "919.5516662597656"], ["安提瓜和巴布达", "755.4051513671875", "709.1016540527344"],
              ["巴巴多斯", "770.93017578125", "748.2016296386719"],
              ["圣文森特和格林纳丁斯", "754.2551574707031", "751.6516418457031"],
              ["圣卢西亚", "748.5051574707031", "737.8516540527344"], ["格林纳达", "743.3301391601562", "759.7016296386719"],
              ["马耳他", "1409.7551879882812", "522.8016662597656"], ["马尔代夫", "1874.3551635742188", "844.8016662597656"],
              ["圣基茨和尼维斯", "732.4051818847656", "713.70166015625"], ["马绍尔群岛", "2710.4051513671875", "825.2516479492188"],
              ["列支敦士登", "1367.2051391601562", "411.25164794921875"], ["圣马力诺", "1391.93017578125", "443.45164489746094"],
              ["图瓦卢", "2845.5302734375", "959.8016662597656"], ["瑙鲁", "2735.130126953125", "857.4516296386719"],
              ["摩纳哥", "1348.8051147460938", "443.45164489746094"], ["梵蒂冈", "1391.93017578125", "463.00164794921875"],
              ["新加坡", "2197.505126953125", "865.5016479492188"]]
    coorlist = []
    for keyword in keywordlist:
        for point in points:
            coor = []
            if keyword == point[0]:
                coor.append(point[1])
                coor.append(point[2])
                coorlist.append(coor)
    return coorlist












